##  data analysis '53 paper 
##  binary cutoffs results table
rm(list = ls())

library(readxl)
library(multiwayvcov)
library(lmtest)
library(Hmisc)

setwd("~/Dropbox/Current projects/1953/Replication archive")


## choose signal strength measure to employ
signal <- "lrm_4"

## should municipalities be dropped/recoded to account for possible jamming?
## jamming = km10, km12, km15; adjust = "drop", "min"
jamming <- ""
adjust <- ""

load(paste("1953data", signal, adjust, jamming, ".RData", sep = ""))


## rescale distance to East Berlin so that coefficients are not too small
dat$distance_to_berlin <- dat$distance_to_berlin/100



#########################################################
## model 1 --- RIAS and distance to Berlin; fixed effects
#########################################################						
cutoffs <- seq(	from = quantile(dat$signal, probs = .05),
				to = quantile(dat$signal, probs = .95), length.out = 40)

res <- matrix(NA,3,length(cutoffs))

m <- 1
for(m in 1:length(cutoffs))	{

	dat$cutoff <- dat$signal > cutoffs[m]
	table(dat$cutoff)

	probit.out <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +

							cutoff +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

	summary(probit.out)

	## clustered standard errors
	vcov.cl <- cluster.vcov(probit.out, dat$county)
	temp <- coeftest(probit.out, vcov.cl)


	library(MASS)
	X.pred <- model.matrix(probit.out)
	sims <- 10000

	set.seed(123)
	gamma.tilde <- mvrnorm(n = sims, mu = probit.out$coef, Sigma = vcov.cl)

	## Co
	X.pred[,"cutoffTRUE"] <- 0
	out0 <- apply(pnorm(X.pred %*% t(gamma.tilde)),2,mean)

	## Tr
	X.pred[,"cutoffTRUE"] <- 1
	out1 <- apply(pnorm(X.pred %*% t(gamma.tilde)),2,mean)

	res[1,m] <- mean(out1 - out0)
	res[2,m] <- sort(out1 - out0)[(sims*.025)+1]
	res[3,m] <- sort(out1 - out0)[(sims*.975)]

	cat(m, "\n")
	}


plot(cutoffs, res[1,], ylim = c(-.15,.15), type = "p",
xlab = "Threshold for RIAS access (dBuV/m)", pch = 19, col = "#A020F066",
cex.axis = 1.2, cex.lab = 1.2, ylab = "Estimated RIAS effect", lwd = 2)

abline(h = 0, col = "#A9A9A966", lwd = 2)

m <- 1
for(m in 1:length(cutoffs))	{

lines(x = c(cutoffs[m], cutoffs[m]), y = c(res[1,m] -.015, res[2,m]), col = "#A020F066")
lines(x = c(cutoffs[m], cutoffs[m]), y = c(res[1,m] +.015, res[3,m]), col = "#A020F066")

							}



